################################################################################################
####### Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
####### Chapter 5: Replication for analysis at the county level part 1 
####### R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
####### Platform: x86_64-apple-darwin15.6.0 (64-bit)
############################################################################


rm(list = ls())
library(ggplot2)
library(openxlsx)
library(rddensity)
library(rdrobust)
library(rdlocrand)
library(xtable)
library(rdd)
library(texreg)

#########################################################################################
##### Table A.5 The effect of displacement on admin capacity and placebo outcomes  ######
#########################################################################################

data1950distFin<-read.xlsx("chapter5/chapter5_replication_counties.xlsx")

data1950distFin<-subset(data1950distFin, (Partition=="ZZ" | Partition=="PRUS")) #   

data1950distFin$PozaRolnEcActive<-data1950distFin$CzynniRolnictwo/data1950distFin$CzynniZawodowo 
data1950distFin$PrzemyslEcActive<-data1950distFin$CzynniPrzemysl/data1950distFin$CzynniZawodowo 

data1950distFin$PozaRolnPct<-data1950distFin$LudnoscUtrzRoln/data1950distFin$Ludnosc 
data1950distFin$AdminPThous<-data1950distFin$CzynniAdministracja/(data1950distFin$Ludnosc/1000) 
data1950distFin$CzGospKomThous<-data1950distFin$CzynniGospKom/(data1950distFin$Ludnosc/1000) 

data1950distFin$StateServiceTot<-(data1950distFin$CzynniGospKom+data1950distFin$CzynniAdministracja)/(data1950distFin$Ludnosc/1000) 
data1950distFin$StateServiceEcActive<-(data1950distFin$CzynniGospKom+data1950distFin$CzynniAdministracja+data1950distFin$CzynniOchronaZdrowia)/data1950distFin$CzynniZawodowo 

summary(data1950distFin$StateServiceTot[data1950distFin$Partition=="ZZ"]) 
sd(data1950distFin$StateServiceTot[data1950distFin$Partition=="ZZ"]) 

#Calculate area: 
data1950distFin$areaKm2<-data1950distFin$Ludnosc/data1950distFin$Na1km
data1950distFin$AdminPArea<-data1950distFin$CzynniAdministracja/data1950distFin$areaKm2 
data1950distFin$ShareMen<-data1950distFin$Men/data1950distFin$Ludnosc 

X<-ifelse(data1950distFin$Partition=="ZZ", data1950distFin$distance, -data1950distFin$distance)/1000
Z<-cbind(data1950distFin$Miasto, data1950distFin$lat, data1950distFin$lon, data1950distFin$areaKm2, data1950distFin$latlon) 

newframe <- cbind(data1950distFin$AdminPThous, data1950distFin$CzGospKomThous, data1950distFin$StateServiceTot, data1950distFin$Na1km, data1950distFin$ShareMen,data1950distFin$PozaRolnEcActive, data1950distFin$LiberationMoYearNo, data1950distFin$AdminPArea, X, Z)
newframe <- as.data.frame(newframe)
colnames(newframe) <- c("AdminPThous", "CzGospKomThous","StateServiceTot", "Na1km", "ShareMen", "PozaRolnEcActive","LiberationMoYearNo","AdminPArea", "X", "Miasto",  "Lat", "Lon", "areaKm2")

#### Table A5. Results without covariates (top panel)
rdmod1a <- RDestimate(AdminPThous ~ X , data = newframe, se.type = "HC1")
summary(rdmod1a)

rdmod2a<- RDestimate(CzGospKomThous ~ X, data = newframe, se.type = "HC1")
summary(rdmod2a) 

rdmod3a <- RDestimate(StateServiceTot ~ X, data = newframe, se.type = "HC1")
summary(rdmod3a)

rdmod4a <- RDestimate(Na1km ~ X, data = newframe, se.type = "HC1")
summary(rdmod4a) 

rdmod5a <- RDestimate(PozaRolnEcActive ~ X, data = newframe, se.type = "HC1", bw=10)
summary(rdmod5a) 

Coefficient<-c(round(rdmod1a[[3]][1],2), round(rdmod2a[[3]][1],2), round(rdmod3a[[3]][1],2), round(rdmod4a[[3]][1],2), round(rdmod5a[[3]][1],2))

SE<-c(round(rdmod1a[[5]][1],2), round(rdmod2a[[5]][1],2), round(rdmod3a[[5]][1],2), round(rdmod4a[[5]][1],2), round(rdmod5a[[5]][1],2))

Bandwidth<-c(round(rdmod1a[[4]][1], 2), round(rdmod2a[[4]][1], 2),round(rdmod3a[[4]][1], 2), round(rdmod4a[[4]][1], 2), round(rdmod5a[[4]][1], 2))

Observations<-c(rdmod1a[[8]][1], rdmod2a[[8]][1], rdmod3a[[8]][1], rdmod4a[[8]][1], rdmod5a[[8]][1])

tableA1a<-as.data.frame(rbind(Coefficient, SE, Observations, Bandwidth))
colnames(tableA1a)<-c("Officials","Municipal Services", "Officials+Mun Services", "People per 1 km2", "Share Outside Agriculture")
rownames(tableA1a)<-c("Coefficient", "Standard error", "Observations", "Bandwidth")

xtable(tableA1a,label = 'tab:rd.occupations', caption = 'Regression Discontinuity Analysis: Data from the 1950 census')



#### Table A5. Results with covariates (bottom panel) 
rdmod1 <- RDestimate(AdminPThous ~ X | Miasto+LiberationMoYearNo+Lat + Lon +areaKm2, data = newframe, se.type = "HC1")
summary(rdmod1)

rdmod2 <- RDestimate(CzGospKomThous ~ X|  Miasto+LiberationMoYearNo+Lat + Lon +areaKm2, data = newframe, se.type = "HC1", bw=10)
summary(rdmod2)  #this model works only if bandwidth is set manually. Otherwise error.

rdmod3 <- RDestimate(StateServiceTot ~ X | Miasto+LiberationMoYearNo+Lat + Lon +areaKm2, data = newframe, se.type = "HC1")
summary(rdmod3)

rdmod4 <- RDestimate(Na1km ~ X | Miasto+LiberationMoYearNo+Lat + Lon +areaKm2, data = newframe, se.type = "HC1")
summary(rdmod4) 

rdmod5 <- RDestimate(PozaRolnEcActive ~ X | Miasto+LiberationMoYearNo+Lat + Lon +areaKm2, data = newframe, se.type = "HC1", bw=10)
summary(rdmod5) 

Coefficient<-c(round(rdmod1[[3]][1],2), round(rdmod2[[3]][1],2), round(rdmod3[[3]][1],2), round(rdmod4[[3]][1],2), round(rdmod5[[3]][1],2))

SE<-c(round(rdmod1[[5]][1],2), round(rdmod2[[5]][1],2), round(rdmod3[[5]][1],2), round(rdmod4[[5]][1],2), round(rdmod5[[5]][1],2))

Bandwidth<-c(round(rdmod1[[4]][1], 2), round(rdmod2[[4]][1], 2),round(rdmod3[[4]][1], 2), round(rdmod4[[4]][1], 2), round(rdmod5[[4]][1], 2))

Observations<-c(rdmod1[[8]][1], rdmod2[[8]][1], rdmod3[[8]][1], rdmod4[[8]][1], rdmod5[[8]][1])

tableA1<-as.data.frame(rbind(Coefficient, SE, Observations, Bandwidth))
colnames(tableA1)<-c("Officials","Municipal Services", "Officials+Mun Services", "People per 1 km2", "Share Outside Agriculture")
rownames(tableA1)<-c("Coefficient", "Standard error", "Observations", "Bandwidth")

xtable(tableA1, label = 'tab:rd.occupations', caption = 'Regression Discontinuity Analysis: Data from the 1950 census')

#Figure 5.2:
plot1 <- function(){
  plot(rdmod1, range = c(-20, 20))
  title(main = "", xlab = "Non-resettled <-- (distance to the 1919 border, km) --> Resettled", ylab="Officials per 1000 people")
  abline(v = 0, col = "red")
}

plot1()

##############################################################
######### Table A6. ALTERNATIVE ESTITMATION STRATEGY: ########
##############################################################
Z<-cbind(data1950distFin$Miasto, data1950distFin$LiberationMoYearNo, data1950distFin$lat, data1950distFin$lon, data1950distFin$areaKm2) 


out1 = rdrobust(data1950distFin$AdminPThous, X,   covs=Z, vce="hc1", kernel = "triangular", p = 1, bwselect = "mserd", all=TRUE)  
summary(out1) 

out2= rdrobust(data1950distFin$CzGospKomThous, X, covs=Z, vce="hc1", kernel = "triangular", p = 1, bwselect = "mserd", all=TRUE)  
summary(out2) 

out3= rdrobust(data1950distFin$StateServiceTot, X, covs=Z, vce="hc1", kernel = "triangular", p = 1, bwselect = "mserd", all=TRUE) 
summary(out3) 

out4= rdrobust(data1950distFin$Na1km/1000, X, vce="hc1", kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) 
summary(out4) 

out5 = rdrobust(data1950distFin$PozaRolnEcActive, X, covs=Z, vce="hc1",kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) 
summary(out5) 

## Producing Table A.6. The effect of displacement on administrative capacity and placebo outcomes.
Coefficient<-c(round(out1$coef[1,],2), round(out2$coef[1,],2), round(out3$coef[1,],2), round(out4$coef[1,],2),  round(out5$coef[1,],2))
SE<-c(round(out1$se[1,],2), round(out2$se[1,],2), round(out3$se[1,],2), round(out4$se[1,],2), round(out5$se[1,],2))
CIs<-c(paste(round(out1$ci[3,1],2), round(out1$ci[3,2],2), sep=", "), paste(round(out2$ci[3,1],2), round(out2$ci[3,2],2), sep=", "), paste(round(out3$ci[3,1],2), round(out3$ci[3,2],2), sep=", "), paste(round(out4$ci[3,1],2), round(out4$ci[3,2],2), sep=", "), paste(round(out5$ci[3,1],2), round(out5$ci[3,2],2), sep=", "))
Observations<-c(sum(out1$N), sum(out2$N), sum(out3$N), sum(out4$N), sum(out5$N))

Kernel<-c(out1$k, out2$k, out3$k, out4$k, out5$k)
Polynomial<-c(out1$p,out2$p,out3$p,out4$p,out5$p)
BandwidthType<-c(out1$bwselect,out2$bwselect, out3$bwselect, out4$bwselect, out5$bwselect)
BandwidthKM<-c(round(out1$bws[1,1], 2), round(out2$bws[1,1], 2), round(out3$bws[1,1],2), round(out4$bws[1,1],2), round(out5$bws[1,1],2))
EffectiveNUntreated<-c(out1$N_h[1],out2$N_h[1], out3$N_h[1], out4$N_h[1], out5$N_h[1])
EffectiveNTreated<-c(out1$N_h[2],out2$N_h[2], out3$N_h[2], out4$N_h[2], out5$N_h[2])

#Note for some versions of RDrobust, code is EffectiveNUntreated<-c(out1$Nh[1], out2$Nh[1], out3$Nh[1], out4$Nh[1], out5$Nh[1]

tableA1<-as.data.frame(rbind(Coefficient, SE, CIs, Observations, Kernel, Polynomial, BandwidthType, BandwidthKM, EffectiveNTreated, EffectiveNUntreated))
colnames(tableA1)<-c("Officials","Municipal services", "Officials+Mun services", "Pop density", "Outside agriculture")
rownames(tableA1)<-c("Coefficient", "Standard error (conventional)", "Robust bias-corrected CIs", "Observations", "Kernel type", "Polynomial", "Bandwidth type", "MSE-optimal bandwidth", "Effective # treated", "Effective # untreated")

xtable(tableA1,label = 'tab:rd.occupations', caption = 'Regression Discontinuity Analysis: Data from the 1950 census')


#######################################################################################################
##### Replication for Table 5.2 Cultural diversity and admin capacity WITHIN the resettled region #####
#######################################################################################################

cse = function(reg) {
  rob = sqrt(diag(vcovHC(reg, type = "HC1")))
  return(rob)
}
hc1<-function(x) vcovHC(x, type="HC1")  


### This file uses same indicators as above, but contains additional information about the origins of the population. 
counties1950<-read.xlsx("chapter5/counties1950.xlsx")

##Calculate main variables: 
counties1950$AdminPThous<-counties1950$CzynniAdministracja/(counties1950$Ludnosc/1000)
counties1950$CzGospKomThous<-counties1950$CzynniGospKom/(counties1950$Ludnosc/1000) 

counties1950$ShareAut<-counties1950$w_tym_autochtony/counties1950$ludnosc_ogolem
counties1950$ShareMig<-counties1950$ogolem_naplywowa/counties1950$ludnosc_ogolem
counties1950$ShareUSSR<-counties1950$zagranice_ZSRR/counties1950$ludnosc_ogolem
counties1950$ShareOther<-counties1950$zagranica_inny_panstwa/counties1950$ludnosc_ogolem
counties1950$ShareCP<-counties1950$ShareMig-counties1950$ShareUSSR-counties1950$ShareOther

counties1950$Frac<-1-(counties1950$ShareAut^2+counties1950$ShareCP^2+counties1950$ShareUSSR^2+counties1950$ShareOther^2)

counties1950$ShareOver100ha<-counties1950$GospOver100ha/counties1950$GospOgolem
counties1950$SharePrzemysl1939<-counties1950$PrzemyslRzemioslo/counties1950$Population1939

names(counties1950)

####################################################################
######### Table 5.2: Cultural diversity and admin capacity #########
####################################################################
counties1950$Provinz<-as.character(counties1950$Provinz) #Missingness in this variable is only for units that are counted twice.

reg1<-lm(AdminPThous~Frac+log(ludnosc_ogolem), data=counties1950)

reg2<-lm(AdminPThous~Frac+ShareOver100ha+SharePrzemysl1939+Miasto+log(GermDistKM)+log(ludnosc_ogolem)+Provinz, data=counties1950)

reg3<-lm(CzGospKomThous~Frac+log(ludnosc_ogolem), data=counties1950)

reg4<-lm(CzGospKomThous~Frac+ShareOver100ha+SharePrzemysl1939+Miasto+log(GermDistKM)+log(ludnosc_ogolem)+Provinz, data=counties1950)

stateservices <- texreg(
  l = list(reg1, reg2, reg3, reg4),
  override.se=list(coeftest(reg1, vcov.=hc1)[,2], coeftest(reg2, vcov.=hc1)[,2], coeftest(reg3, vcov.=hc1)[,2], coeftest(reg4, vcov.=hc1)[,2]),
  override.pval=list(coeftest(reg1, vcov.=hc1)[,4], coeftest(reg2, vcov.=hc1)[,4], coeftest(reg3, vcov.=hc1)[,4], coeftest(reg4, vcov.=hc1)[,4]),     
  custom.coef.names=c("Fractionalization index", "Ln population","Share farms over 100 ha", "Share in industry", "City", "Ln distance to Germany"),
  dcolumn = TRUE,
  booktabs = TRUE,#
  stars = c(.01,.05,.1),
  label = "tab:AdminWithin",
  omit.coef = "(Intercept)|Provinz", 
  include.rsquared = FALSE
)

stateservices
